#!/usr/bin/env python
# coding=utf-8

"""
发送请求,支持代理,ua,登陆等信息的cookies能自动处理


代码示例
proxy_conf={'ip':'192.168.31.121','port':27017,'db':'crawler_proxy','collection':'proxy'}
request_util = RequestUtil(proxy_conf)
request_util 是一个单例，能保存自动处理登陆后的cookies等信息
response = request_util.make_request('http://www.baidu.com')

修改测试版
"""

import requests
import random
from pymongo import MongoClient

USER_AGENTS = [
    'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.11 TaoBrowser/2.0 Safari/536.11',
    'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.71 Safari/537.1 LBBROWSER',
    'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; LBBROWSER) ',
    'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E; LBBROWSER)" ',
    'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.84 Safari/535.11 LBBROWSER',
    'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E) ',
    'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; QQBrowser/7.0.3698.400) ',
    'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E) ',
    'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SV1; QQDownload 732; .NET4.0C; .NET4.0E; 360SE) ',
    'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E) ',
    'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E) ',
    'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.89 Safari/537.1',
    'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.89 Safari/537.1',
    'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E) ',
    'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E) ',
    'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E) ',
    'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.84 Safari/535.11 SE 2.X MetaSr 1.0',
    'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SV1; QQDownload 732; .NET4.0C; .NET4.0E; SE 2.X MetaSr 1.0) ',
    'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:16.0) Gecko/20121026 Firefox/16.0',
    'Mozilla/5.0 (iPad; U; CPU OS 4_2_1 like Mac OS X; zh-cn) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8C148 Safari/6533.18.5',
    'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:2.0b13pre) Gecko/20110307 Firefox/4.0b13pre',
    'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:16.0) Gecko/20100101 Firefox/16.0',
    'Mozilla/5.0 (Windows; U; Windows NT 6.1; zh-CN; rv:1.9.2.15) Gecko/20110303 Firefox/3.6.15',
    'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11',
    'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11',
    'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.133 Safari/534.16',
    'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0)',
    'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)',
    'Mozilla/5.0 (X11; U; Linux x86_64; zh-CN; rv:1.9.2.10) Gecko/20100922 Ubuntu/10.10 (maverick) Firefox/3.6.10',
    'Mozilla/5.0 (Linux; U; Android 2.2.1; zh-cn; HTC_Wildfire_A3333 Build/FRG83D) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1']


class Singleton(type):
    def __init__(cls, name, bases, dict):
        super(Singleton, cls).__init__(name, bases, dict)
        cls._instance = None

    def __call__(cls, *args, **kwargs):
        if cls._instance is None:
            cls._instance = super(Singleton, cls).__call__(*args, **kwargs)
        return cls._instance


class RequestUtil():
    """发送请求的工具方法"""
    __metaclass__ = Singleton

    def __init__(self, headers={}, proxy_conf={}):

        self.session = requests.Session()

        self.headers = headers

        self.session.headers = self.headers
        self.proxy_collection = None
        self.all_proxy = []

        self._init_proxy(proxy_conf)  # 初始化代理
        self.current_proxy = {}  # 当前代理

        # print self.session.headers

    def _init_proxy(self, proxy_conf):
        if proxy_conf:
            ip = proxy_conf.get('ip')
            if not ip:
                raise Exception(u"proxy ip is None")
            port = proxy_conf.get('porjt')
            if not port:
                port = 27017
            client = MongoClient(ip, port)
            db_name = proxy_conf.get('db')
            if not db_name:
                raise Exception(u'db is None')
            collection_name = proxy_conf.get('collection')
            if not collection_name:
                raise Exception(u'collection is None')
            db = client[db_name]
            self.proxy_collection = db[collection_name]
            self.fresh_proxy()

    def change_header(self, headers):
        '''
        #
        :return:
        '''
        self.session.headers = headers
        print "++++++++", self.session.headers

    def fresh_proxy(self):
        """从数据库获得代理

        """
        try:
            for row in self.proxy_collection.find():
                tem_proxy = {}
                tem_proxy['http'] = "http://" + row['_id']
                self.all_proxy.append(tem_proxy)
        except Exception, e:
            raise Exception(u'get proxy form db faild')

    def change_ua(self, useragent=''):
        """改变ua,如果userageng为空,则随机产生ua"""

        if not useragent:
            self.headers['User-Agent'] = random.choice(USER_AGENTS)
        else:
            self.headers['User-Agent'] = useragent

            # self.session.headers = self.headers

    def make_request(self, url, method="get", data={}, timeout=80, isproxy=False):
        """发送请求

        isproxy 是否使用代理,True:直接使用代理，False:默认不使用代理
        """
        if isproxy:
            if not self.all_proxy:
                raise Exception(u'there in no proxy db onfig or count of ip in db in 0')
            else:
                # import pdb
                # pdb.set_trace()
                self.current_proxy = random.choice(self.all_proxy)

        if method == "get":
            response = self.session.get(url, timeout=timeout, params=data, proxies=self.current_proxy)
        elif method == "post":
            response = self.session.post(url, timeout=timeout, data=data, proxies=self.current_proxy)
        else:
            raise Exception('request method is wrong')
        return response


if __name__ == '__main__':
    proxy_conf = {'ip': '192.168.31.121', 'port': 27017, 'db': 'crawler_proxy', 'collection': 'proxy'}
    request_util = RequestUtil(proxy_conf)
    # request_util 是一个单例，能保存自动处理登陆后的cookies等信息
    response = request_util.make_request('http://www.baidu.com', isproxy=True)
    print response.content
